CONUS404 Regridding (Curvilinear => Rectilinear)#

Create a rectilinear grid (1D lon/lat coordinates) for a specific region. Extract spatial and temporal subset of regridded data to a netcdf file. (Extraction to netcdf may also be done for curvilinear grid.)

%%time
import xarray as xr
import xesmf as xe
import numpy as np
import fsspec
import hvplot.xarray
import geoviews as gv
from matplotlib import path 
import intake
import os
CPU times: user 6.3 s, sys: 608 ms, total: 6.91 s
Wall time: 7.3 s
url = 'https://raw.githubusercontent.com/hytest-org/hytest/main/dataset_catalog/hytest_intake_catalog.yml'
# open the hytest data intake catalog
hytest_cat = intake.open_catalog(url)
list(hytest_cat)
['conus404-drb-eval-tutorial-catalog',
 'nhm-v1.0-daymet-catalog',
 'nhm-v1.1-c404-bc-catalog',
 'nhm-v1.1-gridmet-catalog',
 'conus404-catalog',
 'nwis-streamflow-usgs-gages-onprem',
 'nwis-streamflow-usgs-gages-cloud',
 'nwm21-streamflow-usgs-gages-onprem',
 'nwm21-streamflow-usgs-gages-cloud',
 'nwm21-streamflow-cloud',
 'nwm21-scores',
 'lcmap-cloud',
 'rechunking-tutorial-cloud']
# open the conus404 sub-catalog
cat = hytest_cat['conus404-catalog']
list(cat)
['conus404-hourly-onprem',
 'conus404-hourly-cloud',
 'conus404-hourly-osn',
 'conus404-daily-diagnostic-onprem',
 'conus404-daily-diagnostic-cloud',
 'conus404-daily-onprem',
 'conus404-daily-cloud',
 'conus404-monthly-onprem',
 'conus404-monthly-cloud']
if 'SLURM_CLUSTER_NAME' in os.environ:
    dataset = 'conus404-hourly-onprem'
else:
    dataset = 'conus404-hourly-cloud'
ds = cat[dataset].to_dask()
ds
<xarray.Dataset>
Dimensions:         (time: 368064, y: 1015, x: 1367, bottom_top_stag: 51,
                     bottom_top: 50, soil_layers_stag: 4, x_stag: 1368,
                     y_stag: 1016, snow_layers_stag: 3, snso_layers_stag: 7)
Coordinates:
    lat             (y, x) float32 dask.array<chunksize=(175, 175), meta=np.ndarray>
    lat_u           (y, x_stag) float32 dask.array<chunksize=(175, 175), meta=np.ndarray>
    lat_v           (y_stag, x) float32 dask.array<chunksize=(175, 175), meta=np.ndarray>
    lon             (y, x) float32 dask.array<chunksize=(175, 175), meta=np.ndarray>
    lon_u           (y, x_stag) float32 dask.array<chunksize=(175, 175), meta=np.ndarray>
    lon_v           (y_stag, x) float32 dask.array<chunksize=(175, 175), meta=np.ndarray>
  * time            (time) datetime64[ns] 1979-10-01 ... 2021-09-25T23:00:00
  * x               (x) float64 -2.732e+06 -2.728e+06 ... 2.728e+06 2.732e+06
  * y               (y) float64 -2.028e+06 -2.024e+06 ... 2.024e+06 2.028e+06
Dimensions without coordinates: bottom_top_stag, bottom_top, soil_layers_stag,
                                x_stag, y_stag, snow_layers_stag,
                                snso_layers_stag
Data variables: (12/157)
    ACDEWC          (time, y, x) float32 dask.array<chunksize=(144, 175, 175), meta=np.ndarray>
    ACDRIPR         (time, y, x) float32 dask.array<chunksize=(144, 175, 175), meta=np.ndarray>
    ACDRIPS         (time, y, x) float32 dask.array<chunksize=(144, 175, 175), meta=np.ndarray>
    ACECAN          (time, y, x) float32 dask.array<chunksize=(144, 175, 175), meta=np.ndarray>
    ACEDIR          (time, y, x) float32 dask.array<chunksize=(144, 175, 175), meta=np.ndarray>
    ACETLSM         (time, y, x) float32 dask.array<chunksize=(144, 175, 175), meta=np.ndarray>
    ...              ...
    ZNU             (bottom_top) float32 dask.array<chunksize=(50,), meta=np.ndarray>
    ZNW             (bottom_top_stag) float32 dask.array<chunksize=(51,), meta=np.ndarray>
    ZS              (soil_layers_stag) float32 dask.array<chunksize=(4,), meta=np.ndarray>
    ZSNSO           (time, snso_layers_stag, y, x) float32 dask.array<chunksize=(144, 7, 175, 175), meta=np.ndarray>
    ZWT             (time, y, x) float32 dask.array<chunksize=(144, 175, 175), meta=np.ndarray>
    crs             int32 ...
Attributes: (12/148)
    AER_ANGEXP_OPT:                  1
    AER_ANGEXP_VAL:                  1.2999999523162842
    AER_AOD550_OPT:                  1
    AER_AOD550_VAL:                  0.11999999731779099
    AER_ASY_OPT:                     1
    AER_ASY_VAL:                     0.8999999761581421
    ...                              ...
    WEST-EAST_PATCH_START_STAG:      1
    WEST-EAST_PATCH_START_UNSTAG:    1
    W_DAMPING:                       1
    YSU_TOPDOWN_PBLMIX:              0
    history:                         Tue Mar 29 16:35:22 2022: ncrcat -A -vW ...
    history_of_appended_files:       Tue Mar 29 16:35:22 2022: Appended file ...
nc_outfile = 'CONUS404_DRB_rectilinear.nc'
bbox = [-75.9, -74.45, 38.7, 42.55]
dx = dy = 3./111.    # 3km grid
vars_out = ['T2', 'SNOW']
start = '2017-04-01 00:00'
stop  = '2017-05-01 00:00'

Use xESMF to regrid#

xESMF is a xarray-enabled interface to the ESMF regridder from NCAR. ESMF has options for regridding between curvilinear, rectilinear, and unstructured grids, with conservative regridding options, and much more

from dask.distributed import Client
client = Client()
def bbox2ij(lon,lat,bbox=[-160., -155., 18., 23.]):
    """Return indices for i,j that will completely cover the specified bounding box.     
    i0,i1,j0,j1 = bbox2ij(lon,lat,bbox)
    lon,lat = 2D arrays that are the target of the subset
    bbox = list containing the bounding box: [lon_min, lon_max, lat_min, lat_max]

    Example
    -------  
    >>> i0,i1,j0,j1 = bbox2ij(lon_rho,[-71, -63., 39., 46])
    >>> h_subset = nc.variables['h'][j0:j1,i0:i1]       
    """
    bbox=np.array(bbox)
    mypath=np.array([bbox[[0,1,1,0]],bbox[[2,2,3,3]]]).T
    p = path.Path(mypath)
    points = np.vstack((lon.ravel(),lat.ravel())).T   
    n,m = np.shape(lon)
    inside = p.contains_points(points).reshape((n,m))
    ii,jj = np.meshgrid(range(m),range(n))
    return min(ii[inside]),max(ii[inside]),min(jj[inside]),max(jj[inside])

Before we regrid to rectilinear, let’s subset a region that covers our area of interest. Becuase lon,lat are 2D arrays, we can’t just use xarray to slice these coordinate variables. So we have a routine that finds the i,j locations of a specified bounding box, and then slice on those.#

i0,i1,j0,j1 = bbox2ij(ds['lon'].values, ds['lat'].values, bbox=bbox)
print(i0,i1,j0,j1)
1123 1178 555 663
ds_subset = ds.isel(x=slice(i0-1,i1+1), y=slice(j0-1,j1+1))
ds_subset = ds_subset.sel(time=slice(start,stop))
ds_subset
<xarray.Dataset>
Dimensions:         (time: 721, y: 110, x: 57, bottom_top_stag: 51,
                     bottom_top: 50, soil_layers_stag: 4, x_stag: 1368,
                     y_stag: 1016, snow_layers_stag: 3, snso_layers_stag: 7)
Coordinates:
    lat             (y, x) float32 dask.array<chunksize=(110, 57), meta=np.ndarray>
    lat_u           (y, x_stag) float32 dask.array<chunksize=(110, 175), meta=np.ndarray>
    lat_v           (y_stag, x) float32 dask.array<chunksize=(175, 57), meta=np.ndarray>
    lon             (y, x) float32 dask.array<chunksize=(110, 57), meta=np.ndarray>
    lon_u           (y, x_stag) float32 dask.array<chunksize=(110, 175), meta=np.ndarray>
    lon_v           (y_stag, x) float32 dask.array<chunksize=(175, 57), meta=np.ndarray>
  * time            (time) datetime64[ns] 2017-04-01 ... 2017-05-01
  * x               (x) float64 1.756e+06 1.76e+06 ... 1.976e+06 1.98e+06
  * y               (y) float64 1.88e+05 1.92e+05 1.96e+05 ... 6.2e+05 6.24e+05
Dimensions without coordinates: bottom_top_stag, bottom_top, soil_layers_stag,
                                x_stag, y_stag, snow_layers_stag,
                                snso_layers_stag
Data variables: (12/157)
    ACDEWC          (time, y, x) float32 dask.array<chunksize=(24, 110, 57), meta=np.ndarray>
    ACDRIPR         (time, y, x) float32 dask.array<chunksize=(24, 110, 57), meta=np.ndarray>
    ACDRIPS         (time, y, x) float32 dask.array<chunksize=(24, 110, 57), meta=np.ndarray>
    ACECAN          (time, y, x) float32 dask.array<chunksize=(24, 110, 57), meta=np.ndarray>
    ACEDIR          (time, y, x) float32 dask.array<chunksize=(24, 110, 57), meta=np.ndarray>
    ACETLSM         (time, y, x) float32 dask.array<chunksize=(24, 110, 57), meta=np.ndarray>
    ...              ...
    ZNU             (bottom_top) float32 dask.array<chunksize=(50,), meta=np.ndarray>
    ZNW             (bottom_top_stag) float32 dask.array<chunksize=(51,), meta=np.ndarray>
    ZS              (soil_layers_stag) float32 dask.array<chunksize=(4,), meta=np.ndarray>
    ZSNSO           (time, snso_layers_stag, y, x) float32 dask.array<chunksize=(24, 7, 110, 57), meta=np.ndarray>
    ZWT             (time, y, x) float32 dask.array<chunksize=(24, 110, 57), meta=np.ndarray>
    crs             int32 ...
Attributes: (12/148)
    AER_ANGEXP_OPT:                  1
    AER_ANGEXP_VAL:                  1.2999999523162842
    AER_AOD550_OPT:                  1
    AER_AOD550_VAL:                  0.11999999731779099
    AER_ASY_OPT:                     1
    AER_ASY_VAL:                     0.8999999761581421
    ...                              ...
    WEST-EAST_PATCH_START_STAG:      1
    WEST-EAST_PATCH_START_UNSTAG:    1
    W_DAMPING:                       1
    YSU_TOPDOWN_PBLMIX:              0
    history:                         Tue Mar 29 16:35:22 2022: ncrcat -A -vW ...
    history_of_appended_files:       Tue Mar 29 16:35:22 2022: Appended file ...
ds_subset.nbytes/1e9
2.561452208
da = ds_subset.T2.sel(time='2017-04-25 00:00', method='nearest')
viz = da.hvplot.quadmesh(x='lon', y='lat', geo=True, rasterize=True, cmap='turbo')
base = gv.tile_sources.OSM
base * viz.opts(alpha=0.5)
ds_subset.nbytes/1e9
2.561452208
%%time
ds_subset = ds_subset.chunk({'x':-1, 'y':-1, 'time':24})
CPU times: user 69.4 ms, sys: 19 µs, total: 69.4 ms
Wall time: 67.9 ms
%%time
ds_out = xr.Dataset({'lon': (['lon'], np.arange(bbox[0], bbox[1], dx)),
                     'lat': (['lat'], np.arange(bbox[2], bbox[3], dy))})

regridder = xe.Regridder(ds_subset, ds_out, 'bilinear')
regridder
CPU times: user 159 ms, sys: 6.2 ms, total: 165 ms
Wall time: 242 ms
xESMF Regridder 
Regridding algorithm:       bilinear 
Weight filename:            bilinear_110x57_143x54.nc 
Reuse pre-computed weights? False 
Input grid shape:           (110, 57) 
Output grid shape:          (143, 54) 
Periodic in longitude?      False
%%time
ds_out = regridder(ds_subset[vars_out])
print(ds_out)
<xarray.Dataset>
Dimensions:  (time: 721, lat: 143, lon: 54)
Coordinates:
  * time     (time) datetime64[ns] 2017-04-01 2017-04-01T01:00:00 ... 2017-05-01
  * lon      (lon) float64 -75.9 -75.87 -75.85 -75.82 ... -74.52 -74.49 -74.47
  * lat      (lat) float64 38.7 38.73 38.75 38.78 ... 42.46 42.48 42.51 42.54
Data variables:
    T2       (time, lat, lon) float32 dask.array<chunksize=(24, 143, 54), meta=np.ndarray>
    SNOW     (time, lat, lon) float32 dask.array<chunksize=(24, 143, 54), meta=np.ndarray>
Attributes:
    regrid_method:  bilinear
CPU times: user 1.88 s, sys: 12.9 ms, total: 1.9 s
Wall time: 1.85 s
ds_out['SNOW']
<xarray.DataArray 'SNOW' (time: 721, lat: 143, lon: 54)>
dask.array<_regrid, shape=(721, 143, 54), dtype=float32, chunksize=(24, 143, 54), chunktype=numpy.ndarray>
Coordinates:
  * time     (time) datetime64[ns] 2017-04-01 2017-04-01T01:00:00 ... 2017-05-01
  * lon      (lon) float64 -75.9 -75.87 -75.85 -75.82 ... -74.52 -74.49 -74.47
  * lat      (lat) float64 38.7 38.73 38.75 38.78 ... 42.46 42.48 42.51 42.54
list(ds_out.variables)
['T2', 'SNOW', 'time', 'lon', 'lat']
list(ds_out.data_vars)
['T2', 'SNOW']
ds_out['T2'].encoding
{}
ds_out.time
<xarray.DataArray 'time' (time: 721)>
array(['2017-04-01T00:00:00.000000000', '2017-04-01T01:00:00.000000000',
       '2017-04-01T02:00:00.000000000', ..., '2017-04-30T22:00:00.000000000',
       '2017-04-30T23:00:00.000000000', '2017-05-01T00:00:00.000000000'],
      dtype='datetime64[ns]')
Coordinates:
  * time     (time) datetime64[ns] 2017-04-01 2017-04-01T01:00:00 ... 2017-05-01
encoding={}
for var in ds_out.variables:
    encoding[var] = dict(zlib=True, complevel=2, 
                         fletcher32=False, shuffle=True,
                         _FillValue=None
                        )
%%time

ds_out.to_netcdf(nc_outfile, encoding=encoding, 
                 mode='w')
CPU times: user 841 ms, sys: 52.1 ms, total: 893 ms
Wall time: 14.7 s
ds_nc = xr.open_dataset(nc_outfile)
ds_nc
<xarray.Dataset>
Dimensions:  (time: 721, lat: 143, lon: 54)
Coordinates:
  * time     (time) datetime64[ns] 2017-04-01 2017-04-01T01:00:00 ... 2017-05-01
  * lon      (lon) float64 -75.9 -75.87 -75.85 -75.82 ... -74.52 -74.49 -74.47
  * lat      (lat) float64 38.7 38.73 38.75 38.78 ... 42.46 42.48 42.51 42.54
Data variables:
    T2       (time, lat, lon) float32 ...
    SNOW     (time, lat, lon) float32 ...
Attributes:
    regrid_method:  bilinear
(ds_nc['T2']-273.15).hvplot(x='lon',y='lat', geo=True,
                rasterize=True, cmap='turbo', 
                tiles='OSM', clim=(2,15))
ds_outcl = ds_subset[vars_out]
list(ds_outcl.data_vars)
['T2', 'SNOW']
encoding={}
for var in ds_outcl.variables:
    encoding[var] = dict(zlib=True, complevel=2, 
                         fletcher32=False, shuffle=True,
                         _FillValue=None
                        )
%%time

ds_outcl.to_netcdf('CONUS404_DRB_curvilinear.nc', encoding=encoding, 
                 mode='w')
CPU times: user 741 ms, sys: 31.5 ms, total: 772 ms
Wall time: 13.1 s
client.close()